******************************************************************************************************************
* PROPORTION PREVENTED FROM WORKING DUE TO DISABILITY (PPWD)
******************************************************************************************************************

/* SET-UP
		All of these model syntaxes have a section at the starts that sets the details of the analyses that follow - the idea is that it is easier to change 
		things once, than to have to do find-and-replace throughout a series of syntax files.
		We have filled these in with the variables that we used - but you will need to change this to the variables in your data.
		Obviously you also need to load your data here!
*/
// These are the same for all the example syntax files
global empvar				"rworknew"				// the binary employment variable (1=working, 0=non-working)
global countryvar			"country"				// the categorical variable for country, with each country denoted by a value
global disvar				"llsiH"					// the binary self-reported general disability variable (1 for has a disability, 0 for does not)
global pweight				"rwtresp2"				// the probability weight used for this survey
global controls				"i.ragey i.rmale" 		// control variables
global outputdir 			"${user}\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\ELSA-SHARE-HRS\Outputs"	// the file location to save the output tables to
/* Setting the analysis sample (needs to be done from the outset) 
   - these are the ages and countries used in our analysis - you should set this to whatever you are using. 
   Note that missing data should be dropped before running bootstrapping. */
keep if (ragey>=50 & ragey<70) 
keep if (year==2015 & inlist(country,19,23,29,34,35)) | ( (year==2013 & !inlist(country,19,23,34,35)) | (country==61 & year==2014) | (country==51 & year==2010) )	
drop if missing(${empvar}, ragey, rmale) | (missing(${disvar}) & !inlist(country,51,61) )
// Final things
svyset [pw=${pweight}], strata(country)		// for some commands, it's easier to use the svy prefix than to set weights with [pweight=${pweight}]

*__________________________________________________________________________________________________
* 
**# DEFINING THE PROGRAM TO RUN BELOW
*__________________________________________________________________________________________________

capture program drop disempBS
program define disempBS, eclass
		syntax [varlist] [if], disvar(name) empvar(name) countryvar(name) [controls(varlist fv)] 
		
		****************************************************************************************************
		* ESTIMATION COMMANDS
		****************************************************************************************************

		// Command for DIS EMP GAP
		logit `empvar' ib(1).`disvar'##i.`countryvar' `controls' `if' [pw=${pweight}]	
			* Samples sizes  in the matrix n (e.g. n_predicted), which is outputted from the command below
			matrix n =  `e(N)' 									// [1,1] is obs, [1,2] is n(people) = `e(N_clust)'
				levelsof `countryvar' if e(sample), local(countrylist)			// numbers of clusters (e.g. countries, regimes)
			matrix n = n , wordcount("`countrylist'")			// revises the matrix so that there's an extra col with number of country-wave clusters
			* Margins
			margins i.`disvar' 		, over(`countryvar') 	nose noesample at(${controlsmeans}) post 	// 'nose' is because we're not using the standard error/confidence intervals - we're using the variation in the estimated value across replications
			matrix reg_disemp = e(b)

		// Command for DIS ITSELF
		logit 				`disvar'    i.`countryvar' `controls' `controls2'  `if' [pw=${pweight}] // , vce(cluster mergeid)
			matrix  n =  n, `e(N)'									// Sample size for dis, just as a check
			* Margins
			margins					, over(`countryvar') 	nose noesample at(${controlsmeans}) post 
			matrix reg_dis = e(b)
		
		// Labelling the matrix of sample sizes
		matrix colnames n = "obs_emp" "countries_emp" "obs_dis" // labelling the matrix
	
	
		****************************************************************************************************
		* OUTPUTTING
		****************************************************************************************************

		// GENERATING NEW VECTORS OF RESULTS
		local i = 0
		foreach country in `countrylist' 	{
			local i = `i' + 1
			local out_colnames = "`out_colnames'  `country'.`countryvar'"
			// Disability
			if `i'==1	matrix disemp	= 					reg_disemp[1,colnumb(reg_disemp, "`country'.`countryvar'#1.`disvar'")]
			else		matrix disemp	= disemp, reg_disemp[1,colnumb(reg_disemp, "`country'.`countryvar'#1.`disvar'")]
			// Emp gap (value not vs. baseline) - note that you could also do e.g. 
			if `i'==1	matrix disgap	= 					reg_disemp[1,colnumb(reg_disemp, "`country'.`countryvar'#0.`disvar'")]    - reg_disemp[1,colnumb(reg_disemp, "`country'.`countryvar'#1.`disvar'")]
			else 		matrix disgap	= disgap, reg_disemp[1,colnumb(reg_disemp, "`country'.`countryvar'#0.`disvar'")]    - reg_disemp[1,colnumb(reg_disemp, "`country'.`countryvar'#1.`disvar'")]
			// Absolute disability level 
			if `i'==1	matrix dis		= 			 		reg_dis[1,colnumb(reg_dis, "`country'.`countryvar'")]
			else 		matrix dis		= dis, 	  reg_dis[1,colnumb(reg_dis, "`country'.`countryvar'")]
			// PPFW (% prevented from working) calculations
			if `i'==1	matrix ppfw		= 					dis[1,`i'] * disgap[1,`i']
			else 		matrix ppfw		= ppfw,   dis[1,`i'] * disgap[1,`i']
		/**/ }

		* Labelling the columns
		* The sample sizes are done first
		matrix coleq	n = n
		* `fullmatlist' is a complete list of matrices to be posted just below
		if "`fullmatlist'"~=""	local fullmatlist "`fullmatlist', n"		// This needs to be first - otherwise first results are duplicated!
		if "`fullmatlist'"==""	local fullmatlist "n"
		* Now doing this for each of the results matrices
		local matlist "`matlist' dis disemp disgap ppfw"
		foreach output in `matlist' {
			matrix colnames `output' = `out_colnames'
			matrix coleq	`output' = `output'
			local fullmatlist "`fullmatlist', `output'" 	// A complete list of matrices to be posted just below
		/**/ 						}
		
		// COMBINING VECTORS OF REGRESSION RESULTS INTO e(b)
		tempname BSoutput
		matrix `BSoutput' = `fullmatlist'
		matrix list `BSoutput'
		ereturn post `BSoutput' 
		ereturn local cmd="bootstrap"
			
end


*__________________________________________________________________________________________________
* 
**# RUNNING 'BOOTSTRAP'
*__________________________________________________________________________________________________

// Setting the mean values of control variables, to hold constant for the average marginal effects
global controlsmeans ""					// Calculating mean of controls2 variables - can't be done when setting the globals, as it depends on the analysis dataset
	capture svy: mean ${controls} 
	foreach word in `e(varlist)'	{
		local col = `col' + 1
		local working = e(b)[1, `col']
		global controlsmeans "${controlsmeans} `word'=`working'"
	/**/							}
dis in red "Controls are ${controls} - their means in the atspec are ${controlsmeans"}


// Running the programme
global run = 				101			// Some of the dis weights outputs are run-specific, e.g. if we recalculate by gender
global reps					"3"				 	// replications for bootstrapping, both BOOTSTRAP & SVY BOOTSTRAP
* The actual bootstrap command - for error-checking, add 'noisily' back in
	eststo bootstrap_all: bootstrap, nodrop reps(${reps}) seed(13062017) saving("${workingdata}/bs_run${run}.dta", replace) strata(${countryvar}) /// 	 noisily 
		/**/ :  disempBS, countryvar(${countryvar}) empvar(${empvar}) controls(${controls}) disvar(${disvar}) 
	/* Running these results at a later date
	bstat using "${workingdata}/bs_run${run}.dta"
	*/
	estat bootstrap, bc			// Necessary to recover the main estimate			
	* Outputting the results
	esttab bootstrap_all using "${outputdir}\1_bootstrap_run${run}.csv", csv replace not ci nostar nonum nodepvars b(%5.4f) /// 	
		cells("b ci_normal[ll] ci_percentile[ll] ci_bc[ll]" "bias ci_normal[ul] ci_percentile[ul] ci_bc[ul]")					///
		addnotes("Col 2 is normal-based CI, col3 is percentile-based CI, col4 is bias-corrected CI" 							///
				 "Number of replications is ${reps}, date outputted is `c(current_date)'")

